Smiley face

Our Algorithm_MLCS(Diagonal Extension and Domination, 2017)


Main features
Abstract of the paper [Chan 2017]

Given a pair of merging sequences A, B and a target sequence T, the merged longest common subsequence (MLCS) problem is to find out the longest common subsequence (LCS) between sequences E ( A, B ) and T, where E ( A, B ) is obtained from merging two subsequences of A and B. In this paper, we first propose an algorithm for solving the MLCS problem in O (n|Σ| + (r − L + 1)Lm) time and O (n|Σ| + Lm) space, where r and L denote the lengths of T and MLCS, respectively, and m and n denote the shorter and longer lengths of A and B , respectively. From the time complexity, it is clear that our algorithm is very efficient when T and E ( A, B ) are very similar. With slight modification, our algorithm can also solve another merged LCS problem variant, the block-merged LCS (BMLCS) problem, in O (n|Σ| + (r − L + 1)Lδ) time and O (n|Σ| + Lδ) space, where δ denotes the larger number of blocks of A and B. Experimental results show that our algorithms are faster than other previously published MLCS and BMLCS algorithms for sequences with high similarities. The source codes and datasets for experiments can be found on our web site http://par.cse.nsysu.edu.tw/~mlcs/ [20].

C++ source code
int MLCS_Ours(vector< int> T, vector< int> A, vector< int>B, int Tlen, int Alen, int Blen, int size)
{
    vector< vector< int> > nextA(size + 1, vector< int>(Alen + 1, 0));
    vector< vector< int> > nextB(size + 1, vector< int>(Blen + 1, 0));
    int L = 0;

    int Alenplusone = Alen + 1;
    int Blenplusone = Blen + 1;
    int Lmaxsize = (Alen + Blen < Tlen ? Alen + Blen : Tlen); // Max LCS length

    int Dmaxsize = (Alen < Blen ? Blen : Alen); // Max size of a dominating set

    int **Dx = new int *[Lmaxsize + 2];
    int **Dy = new int *[Lmaxsize + 2];
    for (int i = 0; i < Lmaxsize + 2; ++i) {
        Dx[i] = new int[Dmaxsize + 2];
        Dy[i] = new int[Dmaxsize + 2];
    }
    int *WAx = new int[Dmaxsize + 2];
    int *WAy = new int[Dmaxsize + 2];
    int *WBx = new int[Dmaxsize + 2];
    int *WBy = new int[Dmaxsize + 2];
    int *tempDx = new int[Dmaxsize + 2];
    int *tempDy = new int[Dmaxsize + 2];
    // Dmaxsize=smaller of Alen and Blen, cannot set to Tlenvector< vector< int> > Dx(Lmaxsize + 2, vector< int>(Dmaxsize, -1));
    
    //construct array of nextA
    for (int i = 1; i <= size; i++) {
        nextA[i][Alen] = Alen + 1;
        nextB[i][Blen] = Blen + 1;
    }
    for (int j = Alen; j >= 1; j--) {
        for (int i = 1; i <= size; i++)
            nextA[i][j - 1] = nextA[i][j];
        nextA[A[j]][j - 1] = j;  //  need update only if (A[j] == i)
    }

    //construct array of nextB
    for (int j = Blen; j >= 1; j--) {
        for (int i = 1; i <= size; i++)
            nextB[i][j - 1] = nextB[i][j];
        nextB[B[j]][j - 1] = j;  //  need update only if (B[j] == i)
    }


    Dx[0][1] = 0;
    Dy[0][1] = 0;
    Dx[0][2] = Alenplusone;
    Dy[0][2] = 0;

    for (int s = 1; s <= Lmaxsize; ++s) {
        Dx[s][1] = Alenplusone;
        Dy[s][1] = 0;
    }

    int PosT = 0;
    for (int i = 1; i <= Tlen; ++i) {
        if (i > Tlen - L)
            break;
        for (int s = 1; s <= Tlen - i + 1; ++s) {
            PosT = i + s - 1;
            //(Dk−1,s−1) Extension of A, extension of B
            int j, k;
            int sMinusOne = s - 1;
            for (j = 1, k = 1; Dx[sMinusOne][j] != Alenplusone; ++j) {
                WAx[j] = nextA[T[PosT]][Dx[sMinusOne][j]];
                WAy[j] = Dy[sMinusOne][j];
                WBx[j] = Dx[sMinusOne][j];   // some  are added
                WBy[j] = nextB[T[PosT]][Dy[sMinusOne][j]];
            }
            WAx[j] = Alenplusone;
            WBx[j] = Alenplusone;

            tempDx[1] = Alenplusone;

            // Dominate(Dk−1,s ∪ ExtA )
            if ((Dx[s][1] != Alenplusone) || (WAx[1] != Alenplusone)) {
                Dominate(Dx[s], Dy[s], WAx, WAy, tempDx, tempDy, Alenplusone, Blenplusone);
            }
            // Dominate(Dominate(Dk−1,s ∪ ExtA)∪ExtB)
            if ((tempDx[1] != Alenplusone) || (WBx[1] != Alenplusone)) {
                Dominate(tempDx, tempDy, WBx, WBy, Dx[s], Dy[s], Alenplusone, Blenplusone);
            }


            if ((Dx[s][1] == Alenplusone))
                break;

            if (s > L)
            {
                L = s;
            }

        }//end for j
    }//end for i

     // delete the allocated array Dx[][], Dy[][]
    for (int i = 0; i < Lmaxsize + 2; ++i) {
        delete[] Dx[i];
        delete[] Dy[i];
    }
    delete[] Dx;    delete[] Dy;
    delete[] WAx;   delete[] WAy;   delete[] WBx;   delete[] WBy;
    delete[] tempDx;    delete[] tempDy;
    return L;
}

void Dominate(int *L1x, int *L1y, int *L2x, int *L2y, int *Rx, int *Ry, int Alenplusone, int Blenplusone) {

    int i = 1, j = 1, tempx, tempy;
    int Rsizeminusone = 0;   // compare starting from [0]

    Rx[0] = -1;
    Ry[0] = Blenplusone;   // [0] used for discard 
    Rx[1] = Alenplusone;
    Ry[1] = Blenplusone;
    while ((L1x[i] != Alenplusone) || (L2x[j] != Alenplusone)) {
        // ← the non-null smaller one of the first 2-tuples of L1 and L2
        if (L1x[i] <= L2x[j]) { //<3 , 2> is smaller than <4 , 1>
                                //  tempx = L1x[i];
                                //  tempy = L1y[i];
            if (L1y[i] >= Ry[Rsizeminusone])  // && L2x[j] >= Rx[Rsizeminusone]
                ;   // no operation, discard L2
            else if (L1x[i] > Rx[Rsizeminusone] && L1y[i] < Ry[Rsizeminusone]) {
                ++Rsizeminusone;
                Rx[Rsizeminusone] = L1x[i];
                Ry[Rsizeminusone] = L1y[i];
            }
            else {
                Rx[Rsizeminusone] = L1x[i];
                Ry[Rsizeminusone] = L1y[i];
            } // else discard L1
            ++i;
        }
        else {
            //  tempx = L2x[j];
            //  tempy = L2y[j];
            if (L2y[j] >= Ry[Rsizeminusone])  // && L2x[j] >= Rx[Rsizeminusone]
                ;   // no operation, discard L2
            else if (L2x[j] > Rx[Rsizeminusone] && L2y[j] < Ry[Rsizeminusone]) {
                ++Rsizeminusone;
                Rx[Rsizeminusone] = L2x[j];
                Ry[Rsizeminusone] = L2y[j];
            }
            //    else if (L2x[j] <= Rx[Rsizeminusone] && L2y[j] <= Ry[Rsizeminusone]) {
            else {
                Rx[Rsizeminusone] = L2x[j];
                Ry[Rsizeminusone] = L2y[j];
            }
            ++j;
        }
    }
    ++Rsizeminusone;
    Rx[Rsizeminusone] = Alenplusone;
    Ry[Rsizeminusone] = 0;

}


The files
  All_MLCS.cpp

Reference
Kuo-Tsung Tseng, De-Sheng Chan, Chang-Biau Yang, and Shou-Fu Lo, “Efficient Merged Longest Common Subsequence Algorithms for Similar Sequences,” Theoretical Computer Science 708 (2018) 75–90.